import os
import shutil

import time

import requests
from lxml import etree  # pip install lxml


def kanxiaojiejie(page):
    # 1. 先获取源代码
    url = f"https://www.kanxiaojiejie.tk/page/{page}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78'
    }
    # r就是源代码
    r = requests.get(url=url, headers=headers).text

    # print(r)
    # 2. 解析源代码, 获取出所有的图片地址
    ret = etree.HTML(r)  # 先转换成xml类型
    # 获取任意位置的article标签, 总共有20个
    div_list = ret.xpath('//div[@class="gridsoul-posts-content"]/div/div')  # [article1, article2,article3...article20]

    i = 1
    for div in div_list[2:]:  # 每一次遍历的类型都是xml类型
        # 获取当前article标签下面的任意位置的img标签,     .当前
        img = div.xpath('.//img/@src')[0]
        # print(img)
        # 3. 保存图片
        img_code = requests.get(img).content  # 图片的二进制数字

        with open(f'小姐姐/{page}-{i}.jpg', 'wb') as f:
            f.write(img_code)
            time.sleep(1)
            print(f'小姐姐/{page}-{i}.jpg')
        i += 1

if __name__ == '__main__':

     # 如果当前路径没有'小姐姐'文件夹
    if not os.path.exists(r'小姐姐'):
        # 则创建'小姐姐'文件夹
        os.mkdir(r'小姐姐')
for page in range(1, 5):
    kanxiaojiejie(page)


"""
//*[@id="wrapper"]
"""
